********************************************************************************
* matching_worker_eligible.do
* Purpose: Construct the ELIGIBLE WORKER SAMPLE for balance-table comparisons.
*
* Mirrors matching_firm_eligible.do at the worker level. Retains all treated
* workers satisfying eligibility criteria (whether matched or not), plus
* matched control workers. Used in Table A9/A10 to assess pre-matching
* balance and to report unmatched worker characteristics.
*
* Output: $data/worker_eligible.dta        (event-study panel, eligible sample)
*         $data/worker_eligible_list.dta   (IDs of eligible workers)
********************************************************************************
global start_year = 2005
global end_year = 2016

**# matching
forvalues y = $start_year/$end_year {

	use $data/worker_`y', replace
	
	gen year_prior	=	year
	
	* merge in eligible treated firms and matched control firms
	joinby entid_syn year_prior using $data/firm_eligible_list
	
	
	
	** Keep worker if earn more than ~4k CAD in real wage **
	gegen 	total_wage 		= sum(t4earn), by(casenum2019)
	gen		real_total_wage = total_wage/CPI_base_2011
	drop if real_total_wage < 3900 | mi(real_total_wage)
	
	* drop workers moonlighters 
	drop if moonlighter == 1

	* drop workers with less than 4 years of tenure and gaps in their employment
	drop 	if max_gap > 1 | mi(max_gap)					// drop workers with gaps in their employment
	gen 	present_at_treated = (`y' + 1 <= last_year_at_firm)
	drop 	if present_at_treated == 0 						// drop workers that leave firms at t = 0
	drop 	if tenure < 4									// drop workers with less than 4 years of tenure
	
	drop if mi(t1_age_recorded) | mi(t1_sex_recorded) | mi(naics2) | mi(OPAddressProvince)
	
	merge 1:1 casenum2019 year_prior using $data/worker_matched_list, keep(1) nogen
	drop if treated == 0

	keep casenum2019 entid_syn treated year_prior
	
	save $data/worker_eligible_unmatched_list_`y',  replace 
	
}

drop _all
forvalues y = $start_year/$end_year {
	dis( "`y'")
	append using $data/worker_eligible_unmatched_list_`y',  keep(casenum2019 entid_syn treated year_prior) force
}	

append using $data/worker_matched_list, keep(casenum2019 entid_syn treated pairid year_prior) force

compress
save $data/worker_eligible_list, replace


**# eligible treated workers + matched control workers panel
forvalues y = 2001/2017 {

	use $data/worker_`y', clear
	
	rename entid_syn temp_id
	
	* keep matched treated and control workers
	joinby casenum2019 	using $data/worker_eligible_list, unmatched(none)
	merge m:1 entid_syn using $data/first_mna, keep(1 3) keepusing(entid_syn) nogen

	** keep workers at dominant (M&A) firms **
	gen 	same_firm = (temp_id == entid_syn)
	drop 	entid_syn
	rename	temp_id entid_syn
	
	gegen total_wage = sum(t4earn), by(casenum2019 pairid year_prior)
	
	gsort casenum2019 year_prior -same_firm -t4earn entid_syn
	duplicates drop casenum2019 year_prior pairid, force
	
	drop same_firm
	
	compress
	save $data/worker_matched_`y', replace
}

drop _all
forvalues y = 2001/2017 {
	append using $data/worker_matched_`y', force
	erase $data/worker_matched_`y'.dta
}

save $data/worker_eligible_intermid, replace

**# matched worker panel
gsort pairid year_prior -treated year

** ID variables
gegen worker_id	= group(casenum2019)
gegen firm_id 	= group(entid_syn)
gegen id 		= group(worker_id year_prior)

gen matched = ~mi(pairid)

**# Time Variables
gen 	t = year - (year_prior + 1)
replace t = 6 	if t >  5 	& ~mi(t)
replace t = -6 	if t < -5 	& ~mi(t)
tab t, gen(ds_)
levelsof t, local(ts)
local end = r(r)
forvalue i = 1(1)`end'{
	local temp = `i' - 7
	label variable ds_`i' "`temp'"
}
replace ds_6 = 0

**# Sector
destring naics, replace
drop naics2
gen 	naics2 = int(naics/100)
replace naics2 = 31 if naics2 == 32 | naics2 == 33
replace naics2 = 44 if naics2 == 45
replace naics2 = 48 if naics2 == 49
replace naics2 = 54 if naics2 == 56 | naics2 == 61 | naics2 == 62

** identify the sector prior to the event
gen naics2_event 	= naics2	if treated == 1	& year == year_prior
gen naics_event		= naics		if year == year_prior
gen firm_event 		= firm_id 	if year == year_prior
gegen naics_tmp 	= firstnm(naics_event), by(id)
gegen firmid_tmp 	= firstnm(firm_event),  by(id)
replace naics 		= naics_tmp if firm_id == firmid_tmp & year <= year_prior + 1

** merge in mna characteristics
merge	m:1 entid_syn 	using 	$data/first_mna, keep(1 3) keepusing(Acquirer) nogen
replace Acquirer = . 		if ~(t == -1 & treated == 1)

**# pair-level variables and unmatched-worker variables
gsort pairid year_prior -treated year

gegen matched_acq_tmp	= firstnm(Acquirer),		by(id)
gegen matched_acq 		= firstnm(Acquirer),		by(pairid year_prior)
replace matched_acq 	= matched_acq_tmp if matched == 0

compress
save $data/worker_eligible, replace